#!/usr/bin/python
# -*- coding: UTF-8 -*-

import string
import json
import time
import requests
import MySQLdb
import random
import re

conn = MySQLdb.connect("10.10.87.38", "crm", "crm@2015", "sina_news", charset='utf8')
cursor = conn.cursor()
template_url = string.Template(
    'http://zhibo.sina.com.cn/api/zhibo/feed?callback=jQuery$jQueryId&page=1&page_size=$page_size&zhibo_id=152&tag_id=$tag_id&dire=f&dpc=1&pagesize=$page_size&_=$datetime')
tag_ids = {u'A股': 10, u'宏观': 1, u'行业': 2, u'公司': 3, u'数据': 4, u'市场': 5, u'观点': 6, u'央行': 7, u'其他': 8,}
headers = {
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
    'Accept': '*/*',
    #   'Referer'           :   'http://finance.sina.com.cn/7x24/?tag=10',
    'Connection': 'keep-alive',
    'Cookie': 'U_TRS1=000000d5.5a546170.5cb7de02.83b7c4e0; U_TRS2=000000d5.5a616170.5cb7de02.256bb0da; UOR=www.baidu.com,blog.sina.com.cn,; SINAGLOBAL=114.114.114.114_1555553794.454804; Apache=114.114.114.213_1555553794.454805; ULV=1555553794485:1:1:1:114.114.114.114_1555553794.454805:; SCF=AhOLahPmRlTviyZ4YQHaxRNdunCqZL3kO2SBnELkwjeVg8ZMdSXgud0IsBd4CaJIt5s-9YmaaRxgNVK4w6koPXE.; ULOGIN_IMG=gz-d89f6db983d2c25da42c59504991a4867f53; sso_info=v02m6alo5qztLSNk4S5jJOQs46TnKadlqWkj5OEuI6DnLCOg4y1jbOMwA==; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhM3uQ2UWBVDQgNwIoS4aG35NHD95Qp1hnNehn0SKM0Ws4Dqcjci--Xi-zRiKn7i--fiKysi-8Wi--fi-2Xi-2Ni--RiK.7iKyhi--fiKnfiK.Xi--fi-82iK.7; _s_upa=5; lxlrttp=1556243090; NEWSCENTER=522b9f1b6a2f61766931ac50242bed94; SUB=_2A25x5Y0UDeRhGedG41UR-C3JzD-IHXVSkvncrDV_PUNbm9BeLWitkW9NUOtWwD6pEwnHVFGGf0Y42aAcKr49dHwM; ALF=1589850308',
}

for news_type, tag_id in tag_ids.items():
    headers['Referer'] = 'http://finance.sina.com.cn/7x24/?tag=%s' % random.choice(tag_ids.values())
    datetime = int(1000 * time.time())
    crawlurl = template_url.substitute(datetime=datetime, jQueryId="111207214587420816325_%s" % datetime, tag_id=tag_id,
                                       page_size=20)
    try:
        text = requests.get(crawlurl, timeout=2, headers=headers).text
        news = json.loads(re.sub('^try[^\(]*\(|\);}catch\(e\){};$', '', text))['result']['data']['feed']['list']
    except Exception, e:
        print str(e)
        continue

    for data in news:
        unique_id = data['id']
        rich_text = data['rich_text']
        create_time = data['create_time']
        try:
            mysql_command = u"insert into sina_news.news (id,news_type,create_time,rich_text) values ('%s','%s','%s','%s')" % (unique_id, news_type, create_time, rich_text)
            mysql_command += u" on duplicate key update news_type='%s', create_time='%s',rich_text='%s';" % (news_type, create_time, rich_text)
            cursor.execute(mysql_command)
            conn.commit()
        except Exception, e:
            print mysql_command
            print str(e)
            pass
